1 Load required libraries
2 Variables
## [1] "treatment"
## [1] "batch"
## [1] "CRISPRi_CONTROL"
## [1] 1
## [1] 0.05
## [1] "treatment_TEAD2_vs_CRISPRi_CONTROL"
## [1] FALSE
3 Data
## ENCSR074FBG_1 ENCSR074FBG_2 ENCSR095PIC_1 ENCSR095PIC_2
## ENSG00000000003 18 8 2 2
## ENSG00000000005 0 2 0 2
## ENSG00000000419 1693 1732 702 1070
## ENSG00000000457 871 857 538 757
## ENSG00000000460 1379 1300 593 920
## ENSG00000000938 2 10 5 0
## [1] 58062 4
5 Defining formula
form <- NULL
if(is.null(covariates)){
if(str_length(condition) > 0) form <- as.formula(paste0("~ ", condition))
} else if(str_length(condition) > 0 & (length(covariates) == 0)) {
form <- as.formula(paste0("~ ", condition))
} else if(length(covariates) > 0) {
form <- as.formula(paste0("~ ",paste(covariates,collapse = "+")," + ", condition))
}
as.character(form)## [1] "~" "batch + treatment"
6 Differential Expression Analysis (DEA)
res <- NULL
# Samples must be in the same order as in the metadata
metadata <- metadata[match(colnames(raw_cts), metadata %>% pull(1)),]
# To perform the DEA samples cannot be labled as NA we will remove it
table(metadata[,condition])##
## CRISPRi_CONTROL TEAD2
## 2 2
keep.samples <- !is.na(metadata[,condition,drop = T])
# Perform DEA using defined formula
dds <- DESeqDataSetFromMatrix(countData = raw_cts[,keep.samples],
colData = metadata[keep.samples,],
design = form)## converting counts to integer mode
## the design formula contains one or more numeric variables with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
# remove genes with low number of counts as suggested in DESeq2 vignette
keep <- rowSums(counts(dds)) >= 10
dds[[condition]] <- relevel(dds[[condition]], ref = reference)
dds <- dds[keep,]
res <- DESeq(dds)## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## [1] "Intercept" "batch"
## [3] "treatment_TEAD2_vs_CRISPRi_CONTROL"
6.1 Get results
if(str_length(deaSelect) == 0) {
# if not result selected get the first one
dea <- DESeq2::results(res) %>% as.data.frame
} else {
if (lfc) {
dea <- as.data.frame(lfcShrink(res,
coef = deaSelect,
type = "apeglm"))
} else {
if (log2FoldChange > 0){
dea <- DESeq2::results(res,
name = deaSelect,
lfcThreshold = log2FoldChange,
altHypothesis = "greaterAbs") %>%
as.data.frame
} else {
dea <- DESeq2::results(res, name = deaSelect) %>%
as.data.frame
}
}
}
DT::datatable(dea)6.2 Volcano plot
x.cut <- log2FoldChange
y.cut <- padj
dea$group <- "Not Significant"
dea[which(dea$padj < y.cut & dea$log2FoldChange < -x.cut ),"group"] <- "Downregulated"
dea[which(dea$padj < y.cut & dea$log2FoldChange > x.cut ),"group"] <- "Upregulated"
f <- list(
family = "Courier New, monospace",
size = 18,
color = "#7f7f7f"
)
x <- list(
title = "log2FoldChange",
titlefont = f
)
y <- list(
title = "-log10(p-value adjusted)",
titlefont = f
)
p <- plot_ly(data = dea,
x = dea$log2FoldChange,
y = -log10(dea$padj),
text = rownames(dea),
mode = "markers",
color = dea$group) %>%
layout(title = "Volcano Plot") %>%
layout(xaxis = x, yaxis = y) %>%
layout(shapes = list(list(type = 'line',
x0 = x.cut,
x1 = x.cut,
y0 = 0,
y1 = max(-log10(dea$padj),na.rm = T),
line = list(dash = 'dot', width = 1)),
list(type = 'line',
x0 = -x.cut,
x1 = -x.cut,
y0 = 0,
y1 = max(-log10(dea$padj),na.rm = T),
line = list(dash = 'dot', width = 1)),
list(type = 'line',
x0 = min(dea$log2FoldChange),
x1 = max(dea$log2FoldChange),
y0 = -log10(y.cut),
y1 = -log10(y.cut),
line = list(dash = 'dot', width = 1))
)
)
p7 Session Information
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods
## [9] base
##
## other attached packages:
## [1] plyr_1.8.6 msigdbr_7.0.1
## [3] GSEABase_1.48.0 graph_1.64.0
## [5] annotate_1.64.0 XML_3.99-0.3
## [7] magrittr_1.5 enrichplot_1.6.1
## [9] org.Hs.eg.db_3.10.0 AnnotationDbi_1.48.0
## [11] clusterProfiler_3.14.3 DOSE_3.12.0
## [13] ashr_2.2-47 apeglm_1.8.0
## [15] shinyWidgets_0.5.1 shinyBS_0.61
## [17] shinythemes_1.1.2 shinydashboard_0.7.1
## [19] shinyjqui_0.3.3 plotly_4.9.2.1
## [21] shinyjs_1.1 forcats_0.5.0
## [23] stringr_1.4.0 dplyr_0.8.5
## [25] purrr_0.3.4 readr_1.3.1
## [27] tibble_3.0.1 tidyverse_1.3.0
## [29] iheatmapr_0.4.12 edgeR_3.28.1
## [31] limma_3.42.2 DESeq2_1.26.0
## [33] SummarizedExperiment_1.16.1 DelayedArray_0.12.3
## [35] BiocParallel_1.20.1 matrixStats_0.56.0
## [37] Biobase_2.46.0 GenomicRanges_1.38.0
## [39] GenomeInfoDb_1.22.1 IRanges_2.20.2
## [41] S4Vectors_0.24.4 BiocGenerics_0.32.0
## [43] rmarkdown_2.1 ggplot2_3.3.0
## [45] tidyr_1.0.2 shinycssloaders_0.3
## [47] shiny_1.4.0.2
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.0.0 RSQLite_2.2.0 htmlwidgets_1.5.1
## [4] grid_3.6.3 munsell_0.5.0 DT_0.13
## [7] withr_2.2.0 colorspace_1.4-1 GOSemSim_2.12.1
## [10] knitr_1.28 rstudioapi_0.11 urltools_1.7.3
## [13] bbmle_1.0.23.1 GenomeInfoDbData_1.2.2 mixsqp_0.3-17
## [16] polyclip_1.10-0 bit64_0.9-7 farver_2.0.3
## [19] coda_0.19-3 vctrs_0.2.4 generics_0.0.2
## [22] xfun_0.13 fastcluster_1.1.25 markdown_1.1
## [25] R6_2.4.1 graphlayouts_0.7.0 invgamma_1.1
## [28] locfit_1.5-9.4 bitops_1.0-6 fgsea_1.12.0
## [31] gridGraphics_0.5-0 assertthat_0.2.1 promises_1.1.0
## [34] scales_1.1.0 ggraph_2.0.2 nnet_7.3-14
## [37] gtable_0.3.0 tidygraph_1.1.2 rlang_0.4.5
## [40] genefilter_1.68.0 splines_3.6.3 lazyeval_0.2.2
## [43] acepack_1.4.1 broom_0.5.6 europepmc_0.3
## [46] checkmate_2.0.0 yaml_2.2.1 BiocManager_1.30.10
## [49] reshape2_1.4.4 modelr_0.1.7 crosstalk_1.1.0.1
## [52] backports_1.1.6 httpuv_1.5.2 qvalue_2.18.0
## [55] Hmisc_4.4-0 tools_3.6.3 ggplotify_0.0.5
## [58] ellipsis_0.3.0 RColorBrewer_1.1-2 ggdendro_0.1-20
## [61] ggridges_0.5.2 Rcpp_1.0.4.6 base64enc_0.1-3
## [64] progress_1.2.2 zlibbioc_1.32.0 RCurl_1.98-1.2
## [67] prettyunits_1.1.1 rpart_4.1-15 viridis_0.5.1
## [70] cowplot_1.0.0 haven_2.2.0 ggrepel_0.8.2
## [73] cluster_2.1.0 fs_1.4.1 data.table_1.12.8
## [76] DO.db_2.9 triebeard_0.3.0 reprex_0.3.0
## [79] truncnorm_1.0-8 mvtnorm_1.1-0 SQUAREM_2020.2
## [82] hms_0.5.3 mime_0.9 evaluate_0.14
## [85] xtable_1.8-4 emdbook_1.3.12 jpeg_0.1-8.1
## [88] readxl_1.3.1 gridExtra_2.3 compiler_3.6.3
## [91] bdsmatrix_1.3-4 crayon_1.3.4 htmltools_0.4.0
## [94] later_1.0.0 Formula_1.2-3 geneplotter_1.64.0
## [97] lubridate_1.7.8 DBI_1.1.0 tweenr_1.0.1
## [100] dbplyr_1.4.3 MASS_7.3-51.6 Matrix_1.2-18
## [103] cli_2.0.2 igraph_1.2.5 pkgconfig_2.0.3
## [106] prettydoc_0.3.1 rvcheck_0.1.8 numDeriv_2016.8-1.1
## [109] foreign_0.8-76 xml2_1.3.2 XVector_0.26.0
## [112] rvest_0.3.5 digest_0.6.25 cellranger_1.1.0
## [115] fastmatch_1.1-0 htmlTable_1.13.3 lifecycle_0.2.0
## [118] nlme_3.1-147 jsonlite_1.6.1 viridisLite_0.3.0
## [121] fansi_0.4.1 pillar_1.4.3 lattice_0.20-41
## [124] fastmap_1.0.1 httr_1.4.1 survival_3.1-12
## [127] GO.db_3.10.0 glue_1.4.0 png_0.1-7
## [130] bit_1.1-15.2 ggforce_0.3.1 stringi_1.4.6
## [133] blob_1.2.1 latticeExtra_0.6-29 memoise_1.1.0
## [136] irlba_2.3.3